##9/19/2015
##Replication_Code_6
## This script reproduces Fig 6
##-- burstiness of Speakers vs their number of speeches 
## (in terms of rank in Commons)

rm(list=ls())
setwd("./../data") # C:/Users/as9934/Dropbox/HansardProject/burstiness/bjps_replication/data")

#get panel data
load("paneldata_all.rdata")

#remove anyone who makes _no_ speeches
panel.data <- panel.data[panel.data$speeches>0,]

#get sesssions
sup <- as.character(sort(unique(panel.data$session)))

#get the speakers
spd <- read.csv("speaker_data.csv")
spd$mean.burst <- c()
spd$mean.speech <- c()

#record where speaker was in cdf
cdf_b <- c()
cdf_s <- c()

#for each speaker, get his start and end dates, and get his speechiness
# and burstiness 

for(i in 1:length(spd$mp_id)){
  
  pan.dates <-  sup[ which(sup==spd$start.sess[i]):which(sup==spd$end.sess[i])]
  sub <- panel.data[panel.data$session%in%pan.dates,]
  
  spd$mean.burst[i] <-  mean(sub$burstiness.actual[which(sub$member_id==spd$mp_id[i])])
  spd$mean.speech[i] <- mean(sub$speeches[which(sub$member_id==spd$mp_id[i])])
  
  #everyone else: where was the speaker (in terms of his mean) in the distribution over this period?
  cdf_b<- c(cdf_b, ecdf(sub$burstiness.actual)(mean(sub$burstiness.actual[which(sub$member_id==spd$mp_id[i])])) )
  cdf_s<- c(cdf_s, ecdf(sub$speeches)(mean(sub$speeches[which(sub$member_id==spd$mp_id[i])])  ) )
  
}

short.names <- c("Abercromby", "Lefevre", "Denison", "Brand", "W Peel",  "Gully",  "Lowther")

par(bg='cornsilk1', mfrow = c(1,1))
plot(1:length(short.names), cdf_s, ylim=c(0.7, 1), xlab="", type="b", pch=21, axes=F, ylab="", col="black", bg="darkgreen", cex=2)
axis(1,at=1:length(short.names), labels=short.names)
axis(2)
box()

points(1:length(short.names), cdf_b, type="b", pch=22, col="black", bg="pink", cex=2, lty=2, lwd=2 )

legend("bottomright", pch=c(21,22), pt.bg=c("darkgreen","pink"), col=c("black","black"), 
       legend=c("speechiness","burstiness"), pt.cex=c(2,2))
